From: iap10@labyrinth.cl.cam.ac.uk Date: Tue, 30 Mar 2004 21:30:23 +0000 (+0000) Subject: bitkeeper revision 1.832 (4069e6efDAxnvoZE5ananXkWGDiyig) X-Git-Tag: archive/raspbian/4.8.0-1+rpi1~1^2~18289 X-Git-Url: https://dgit.raspbian.org/%22http:/www.example.com/cgi/%22https:/%22bookmarks://%22/%22http:/www.example.com/cgi/%22https:/%22bookmarks:/%22?a=commitdiff_plain;h=8ee6fdf87ec16e75b6b03e2ebe0f9d7bdc3be817;p=xen.git bitkeeper revision 1.832 (4069e6efDAxnvoZE5ananXkWGDiyig) shadow tables code refactoring stage 1 --- diff --git a/xen/common/shadow.c b/xen/common/shadow.c index 600f8211f4..8704151799 100644 --- a/xen/common/shadow.c +++ b/xen/common/shadow.c @@ -1,4 +1,4 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- */ +/* -*- Mode:C++; c-set-style:BSD; c-basic-offset:4; tab-width:4 -*- */ #include #include @@ -30,19 +30,18 @@ static inline void free_shadow_page( struct mm_struct *m, struct pfn_info *pfn_info ) { unsigned long flags; - unsigned long type = pfn_info->type_and_flags & PGT_type_mask; + unsigned long type = pfn_info->type_and_flags & PGT_type_mask; - m->shadow_page_count--; + m->shadow_page_count--; - if (type == PGT_l1_page_table) + if (type == PGT_l1_page_table) perfc_decr(shadow_l1_pages); else if (type == PGT_l2_page_table) perfc_decr(shadow_l2_pages); - else printk("Free shadow weird page type pfn=%08x type=%08lx\n", + else printk("Free shadow weird page type pfn=%08x type=%08lx\n", frame_table-pfn_info, pfn_info->type_and_flags); - - pfn_info->type_and_flags = 0; + pfn_info->type_and_flags = 0; spin_lock_irqsave(&free_list_lock, flags); list_add(&pfn_info->list, &free_list); @@ -52,210 +51,218 @@ static inline void free_shadow_page( struct mm_struct *m, static void __free_shadow_table( struct mm_struct *m ) { - int j, free=0; - struct shadow_status *a,*next; + int j, free=0; + struct shadow_status *a,*next; - // the code assumes you're not using the page tables i.e. + // the code assumes you're not using the page tables i.e. // the domain is stopped and cr3 is something else!! // walk the hash table and call free_shadow_page on all pages - shadow_audit(m,1); + shadow_audit(m,1); for(j=0;jshadow_ht[j]; - if (a->pfn) - { - free_shadow_page( m, - &frame_table[a->spfn_and_flags & PSH_pfn_mask] ); - a->pfn = 0; - a->spfn_and_flags = 0; - free++; + { + a = &m->shadow_ht[j]; + if (a->pfn) + { + free_shadow_page( m, + &frame_table[a->spfn_and_flags & PSH_pfn_mask] ); + a->pfn = 0; + a->spfn_and_flags = 0; + free++; } - next=a->next; - a->next=NULL; - a=next; - while(a) + next=a->next; + a->next=NULL; + a=next; + while(a) { - struct shadow_status *next = a->next; - - free_shadow_page( m, - &frame_table[a->spfn_and_flags & PSH_pfn_mask] ); - a->pfn = 0; - a->spfn_and_flags = 0; - free++; - a->next = m->shadow_ht_free; - m->shadow_ht_free = a; - a=next; + struct shadow_status *next = a->next; + + free_shadow_page( m, + &frame_table[a->spfn_and_flags & PSH_pfn_mask] ); + a->pfn = 0; + a->spfn_and_flags = 0; + free++; + a->next = m->shadow_ht_free; + m->shadow_ht_free = a; + a=next; } - shadow_audit(m,0); - } - SH_LOG("Free shadow table. Freed= %d",free); + shadow_audit(m,0); + } + SH_LOG("Free shadow table. Freed= %d",free); } static inline int shadow_page_op( struct mm_struct *m, unsigned int op, - struct pfn_info *spfn_info ) + struct pfn_info *spfn_info ) { int work = 0; unsigned int spfn = spfn_info-frame_table; switch( op ) { - case DOM0_SHADOW_CONTROL_OP_CLEAN: - { - int i; - if ( (spfn_info->type_and_flags & PGT_type_mask) == - PGT_l1_page_table ) - { - unsigned long * spl1e = map_domain_mem( spfn<type_and_flags & PGT_type_mask) == + PGT_l1_page_table ) + { + unsigned long * spl1e = map_domain_mem( spfn<shadow_ht[j]; - if (a->pfn) + a = &m->shadow_ht[j]; + if (a->pfn) { - work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] ); + work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] ); } - a=a->next; - while(a) + a=a->next; + while(a) { - work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] ); - a=a->next; + work += shadow_page_op( m, op, &frame_table[a->spfn_and_flags & PSH_pfn_mask] ); + a=a->next; } - shadow_audit(m,0); - } - SH_LOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages)); + shadow_audit(m,0); + } + SH_LOG("Scan shadow table. Work=%d l1=%d l2=%d", work, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages)); } int shadow_mode_enable( struct task_struct *p, unsigned int mode ) { struct mm_struct *m = &p->mm; - struct shadow_status **fptr; - int i; + struct shadow_status **fptr; + int i; - spin_lock_init(&m->shadow_lock); - spin_lock(&m->shadow_lock); + spin_lock_init(&m->shadow_lock); + spin_lock(&m->shadow_lock); m->shadow_mode = mode; - // allocate hashtable + // allocate hashtable m->shadow_ht = kmalloc( shadow_ht_buckets * - sizeof(struct shadow_status), GFP_KERNEL ); - if( ! m->shadow_ht ) + sizeof(struct shadow_status), GFP_KERNEL ); + if( ! m->shadow_ht ) goto nomem; - memset( m->shadow_ht, 0, shadow_ht_buckets * - sizeof(struct shadow_status) ); + memset( m->shadow_ht, 0, shadow_ht_buckets * + sizeof(struct shadow_status) ); - // allocate space for first lot of extra nodes + // allocate space for first lot of extra nodes m->shadow_ht_extras = kmalloc( sizeof(void*) + (shadow_ht_extra_size * - sizeof(struct shadow_status)), GFP_KERNEL ); + sizeof(struct shadow_status)), GFP_KERNEL ); - if( ! m->shadow_ht_extras ) + if( ! m->shadow_ht_extras ) goto nomem; - memset( m->shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size * - sizeof(struct shadow_status)) ); + memset( m->shadow_ht_extras, 0, sizeof(void*) + (shadow_ht_extra_size * + sizeof(struct shadow_status)) ); m->shadow_extras_count++; // add extras to free list - fptr = &m->shadow_ht_free; - for ( i=0; ishadow_ht_free; + for ( i=0; ishadow_ht_extras[i]; fptr = &(m->shadow_ht_extras[i].next); - } - *fptr = NULL; - *((struct shadow_status ** ) - &m->shadow_ht_extras[shadow_ht_extra_size]) = NULL; + } + *fptr = NULL; + *((struct shadow_status ** ) + &m->shadow_ht_extras[shadow_ht_extra_size]) = NULL; if ( mode == SHM_logdirty ) { - m->shadow_dirty_bitmap = kmalloc( p->max_pages/8, GFP_KERNEL ); - if( !m->shadow_dirty_bitmap ) goto nomem; - memset(m->shadow_dirty_bitmap,0,p->max_pages/8); + m->shadow_dirty_bitmap_size = (p->max_pages+63)&(~63); + m->shadow_dirty_bitmap = + kmalloc( m->shadow_dirty_bitmap_size/8, GFP_KERNEL ); + + if( !m->shadow_dirty_bitmap ) + { + m->shadow_dirty_bitmap_size = 0; + goto nomem; + } + memset(m->shadow_dirty_bitmap,0,m->shadow_dirty_bitmap_size/8); } - spin_unlock(&m->shadow_lock); + spin_unlock(&m->shadow_lock); // call shadow_mk_pagetable - shadow_mk_pagetable( m ); + shadow_mk_pagetable( m ); - return 0; + return 0; nomem: - spin_unlock(&m->shadow_lock); - return -ENOMEM; + spin_unlock(&m->shadow_lock); + return -ENOMEM; } static void shadow_mode_disable( struct task_struct *p ) { struct mm_struct *m = &p->mm; - struct shadow_status *next; + struct shadow_status *next; spin_lock(&m->shadow_lock); - __free_shadow_table( m ); - m->shadow_mode = 0; - spin_unlock(&m->shadow_lock); + __free_shadow_table( m ); + m->shadow_mode = 0; + spin_unlock(&m->shadow_lock); - SH_LOG("freed tables count=%d l1=%d l2=%d", + SH_LOG("freed tables count=%d l1=%d l2=%d", m->shadow_page_count, perfc_value(shadow_l1_pages), perfc_value(shadow_l2_pages)); - next = m->shadow_ht_extras; - while( next ) + next = m->shadow_ht_extras; + while( next ) { struct shadow_status * this = next; m->shadow_extras_count--; next = *((struct shadow_status **)(&next[shadow_ht_extra_size])); kfree( this ); - } + } - SH_LOG("freed extras, now %d", m->shadow_extras_count); + SH_LOG("freed extras, now %d", m->shadow_extras_count); if( m->shadow_dirty_bitmap ) { - kfree( m->shadow_dirty_bitmap ); + kfree( m->shadow_dirty_bitmap ); m->shadow_dirty_bitmap = 0; + m->shadow_dirty_bitmap_size = 0; } // free the hashtable itself - kfree( &m->shadow_ht[0] ); + kfree( &m->shadow_ht[0] ); } static void shadow_mode_table_op( struct task_struct *p, unsigned int op ) { - struct mm_struct *m = &p->mm; + struct mm_struct *m = &p->mm; // since Dom0 did the hypercall, we should be running with it's page // tables right now. Calling flush on yourself would be really @@ -263,8 +270,8 @@ static void shadow_mode_table_op( struct task_struct *p, unsigned int op ) if ( m == ¤t->mm ) { - printk("Don't try and flush your own page tables!\n"); - return; + printk("Don't try and flush your own page tables!\n"); + return; } @@ -277,17 +284,17 @@ static void shadow_mode_table_op( struct task_struct *p, unsigned int op ) switch(op) { case DOM0_SHADOW_CONTROL_OP_FLUSH: - __free_shadow_table( m ); - break; + __free_shadow_table( m ); + break; case DOM0_SHADOW_CONTROL_OP_CLEAN: - __scan_shadow_table( m, op ); - if( m->shadow_dirty_bitmap ) - memset(m->shadow_dirty_bitmap,0,p->max_pages/8); - break; + __scan_shadow_table( m, op ); + if( m->shadow_dirty_bitmap ) + memset(m->shadow_dirty_bitmap,0,m->shadow_dirty_bitmap_size/8); + break; } - spin_unlock(&m->shadow_lock); + spin_unlock(&m->shadow_lock); SH_LOG("shadow mode table op : page count %d", m->shadow_page_count); @@ -305,140 +312,214 @@ int shadow_mode_control( struct task_struct *p, unsigned int op ) // don't call if already shadowed... - // sychronously stop domain + // sychronously stop domain if( 0 && !(p->state & TASK_STOPPED) && !(p->state & TASK_PAUSED)) { - printk("about to pause domain\n"); - sched_pause_sync(p); - printk("paused domain\n"); - we_paused = 1; + printk("about to pause domain\n"); + sched_pause_sync(p); + printk("paused domain\n"); + we_paused = 1; } - if ( p->mm.shadow_mode && op == DOM0_SHADOW_CONTROL_OP_OFF ) + if ( p->mm.shadow_mode && op == DOM0_SHADOW_CONTROL_OP_OFF ) { shadow_mode_disable(p); - } - else if ( op == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST ) - { - if(p->mm.shadow_mode) shadow_mode_disable(p); - shadow_mode_enable(p, SHM_test); - } - else if ( p->mm.shadow_mode && op >= DOM0_SHADOW_CONTROL_OP_FLUSH && op<=DOM0_SHADOW_CONTROL_OP_CLEAN ) + } + else if ( op == DOM0_SHADOW_CONTROL_OP_ENABLE_TEST ) + { + if(p->mm.shadow_mode) shadow_mode_disable(p); + shadow_mode_enable(p, SHM_test); + } + else if ( p->mm.shadow_mode && op >= DOM0_SHADOW_CONTROL_OP_FLUSH && op<=DOM0_SHADOW_CONTROL_OP_CLEAN ) { shadow_mode_table_op(p, op); } - else + else { - if ( we_paused ) wake_up(p); + if ( we_paused ) wake_up(p); return -EINVAL; } if ( we_paused ) wake_up(p); - return 0; + return 0; } static inline struct pfn_info *alloc_shadow_page( struct mm_struct *m ) { - m->shadow_page_count++; + m->shadow_page_count++; + + return alloc_domain_page( NULL ); +} + +/************************************************************************/ + +static inline void l1pte_write_fault( struct mm_struct *m, + unsigned long *gpte_p, unsigned long *spte_p ) +{ + unsigned long gpte = *gpte_p; + unsigned long spte = *spte_p; + + switch( m->shadow_mode ) + { + case SHM_test: + spte = gpte; + gpte |= _PAGE_DIRTY | _PAGE_ACCESSED; + spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; + break; + + case SHM_logdirty: + break; + } - return alloc_domain_page( NULL ); + *gpte_p = gpte; + *spte_p = spte; } +static inline void l1pte_read_fault( struct mm_struct *m, + unsigned long *gpte_p, unsigned long *spte_p ) +{ + unsigned long gpte = *gpte_p; + unsigned long spte = *spte_p; + + switch( m->shadow_mode ) + { + case SHM_test: + spte = gpte; + gpte |= _PAGE_ACCESSED; + spte |= _PAGE_ACCESSED; + if ( ! (gpte & _PAGE_DIRTY ) ) + spte &= ~ _PAGE_RW; + break; + + case SHM_logdirty: + break; + } + *gpte_p = gpte; + *spte_p = spte; +} + +static inline void l1pte_no_fault( struct mm_struct *m, + unsigned long *gpte_p, unsigned long *spte_p ) +{ + unsigned long gpte = *gpte_p; + unsigned long spte = *spte_p; + + switch( m->shadow_mode ) + { + case SHM_test: + spte = 0; + if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == + (_PAGE_PRESENT|_PAGE_ACCESSED) ) + { + if ( ! (gpte & _PAGE_DIRTY ) ) + spte &= ~ _PAGE_RW; + } + break; + + case SHM_logdirty: + break; + } + + *gpte_p = gpte; + *spte_p = spte; +} + +/*********************************************************************/ void unshadow_table( unsigned long gpfn, unsigned int type ) { - unsigned long spfn; + unsigned long spfn; SH_VLOG("unshadow_table type=%08x gpfn=%08lx", - type, - gpfn ); + type, + gpfn ); - perfc_incrc(unshadow_table_count); + perfc_incrc(unshadow_table_count); - // this function is the same for both l1 and l2 tables + // this function is the same for both l1 and l2 tables - // even in the SMP guest case, there won't be a race here as + // even in the SMP guest case, there won't be a race here as // this CPU was the one that cmpxchg'ed the page to invalid - spfn = __shadow_status(¤t->mm, gpfn) & PSH_pfn_mask; + spfn = __shadow_status(¤t->mm, gpfn) & PSH_pfn_mask; - delete_shadow_status(¤t->mm, gpfn); + delete_shadow_status(¤t->mm, gpfn); #if 0 // XXX leave as might be useful for later debugging - { + { int i; unsigned long * spl1e = map_domain_mem( spfn<mm, &frame_table[spfn] ); + free_shadow_page( ¤t->mm, &frame_table[spfn] ); } unsigned long shadow_l2_table( - struct mm_struct *m, unsigned long gpfn ) + struct mm_struct *m, unsigned long gpfn ) { - struct pfn_info *spfn_info; - unsigned long spfn; - l2_pgentry_t *spl2e, *gpl2e; - int i; + struct pfn_info *spfn_info; + unsigned long spfn; + l2_pgentry_t *spl2e, *gpl2e; + int i; - SH_VVLOG("shadow_l2_table( %08lx )",gpfn); + SH_VVLOG("shadow_l2_table( %08lx )",gpfn); - perfc_incrc(shadow_l2_table_count); + perfc_incrc(shadow_l2_table_count); // XXX in future, worry about racing in SMP guests // -- use cmpxchg with PSH_pending flag to show progress (and spin) - spfn_info = alloc_shadow_page(m); + spfn_info = alloc_shadow_page(m); ASSERT( spfn_info ); // XXX deal with failure later e.g. blow cache - spfn_info->type_and_flags = PGT_l2_page_table; - perfc_incr(shadow_l2_pages); + spfn_info->type_and_flags = PGT_l2_page_table; + perfc_incr(shadow_l2_pages); - spfn = (unsigned long) (spfn_info - frame_table); + spfn = (unsigned long) (spfn_info - frame_table); - // mark pfn as being shadowed, update field to point at shadow - set_shadow_status(m, gpfn, spfn | PSH_shadowed); + // mark pfn as being shadowed, update field to point at shadow + set_shadow_status(m, gpfn, spfn | PSH_shadowed); - // we need to do this before the linear map is set up - spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT); + // we need to do this before the linear map is set up + spl2e = (l2_pgentry_t *) map_domain_mem(spfn << PAGE_SHIFT); - // get hypervisor and 2x linear PT mapings installed - memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], - &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE], - HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t)); + // get hypervisor and 2x linear PT mapings installed + memcpy(&spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], + &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE], + HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t)); spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); + mk_l2_pgentry((gpfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); + mk_l2_pgentry((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR); spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = - mk_l2_pgentry(__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) | - __PAGE_HYPERVISOR); + mk_l2_pgentry(__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) | + __PAGE_HYPERVISOR); - // can't use the linear map as we may not be in the right PT - gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT); + // can't use the linear map as we may not be in the right PT + gpl2e = (l2_pgentry_t *) map_domain_mem(gpfn << PAGE_SHIFT); - // proactively create entries for pages that are already shadowed - for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) - { + // proactively create entries for pages that are already shadowed + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) + { unsigned long spte = 0; #if 0 // Turns out this doesn't really help - unsigned long gpte; + unsigned long gpte; - gpte = l2_pgentry_val(gpl2e[i]); + gpte = l2_pgentry_val(gpl2e[i]); if (gpte & _PAGE_PRESENT) { @@ -450,203 +531,183 @@ unsigned long shadow_l2_table( if ( unlikely( (__shadow_status(p, gpte>>PAGE_SHIFT) & PGT_type_mask) == PGT_l2_page_table) ) { printk("Linear mapping detected\n"); - spte = gpte & ~_PAGE_RW; + spte = gpte & ~_PAGE_RW; } else { - spte = ( gpte & ~PAGE_MASK ) | (s_sh< %08lx)",gpfn,spfn); + SH_VLOG("shadow_l2_table( %08lx -> %08lx)",gpfn,spfn); - return spfn; + return spfn; } int shadow_fault( unsigned long va, long error_code ) { - unsigned long gpte, spte; + unsigned long gpte, spte; + struct mm_struct *m = ¤t->mm; - SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code ); + SH_VVLOG("shadow_fault( va=%08lx, code=%ld )", va, error_code ); check_pagetable( current, current->mm.pagetable, "pre-sf" ); - if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) ) - { + if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) ) + { SH_VVLOG("shadow_fault - EXIT: read gpte faulted" ); return 0; // propagate to guest - } + } - if ( ! (gpte & _PAGE_PRESENT) ) - { + if ( ! (gpte & _PAGE_PRESENT) ) + { SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte ); return 0; // we're not going to be able to help } if ( (error_code & 2) && ! (gpte & _PAGE_RW) ) { - // write fault on RO page - return 0; - } + // write fault on RO page + return 0; + } spin_lock(¤t->mm.shadow_lock); // take the lock and reread gpte - if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) ) - { + if ( unlikely(__get_user(gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) ) + { SH_VVLOG("shadow_fault - EXIT: read gpte faulted" ); - spin_unlock(¤t->mm.shadow_lock); + spin_unlock(&m->shadow_lock); return 0; // propagate to guest - } + } - if ( unlikely(!(gpte & _PAGE_PRESENT)) ) - { + if ( unlikely(!(gpte & _PAGE_PRESENT)) ) + { SH_VVLOG("shadow_fault - EXIT: gpte not present (%lx)",gpte ); - spin_unlock(¤t->mm.shadow_lock); + spin_unlock(&m->shadow_lock); return 0; // we're not going to be able to help } - spte = gpte; - - if ( error_code & 2 ) - { // write fault + if ( error_code & 2 ) + { // write fault if ( likely(gpte & _PAGE_RW) ) - { - gpte |= _PAGE_DIRTY | _PAGE_ACCESSED; - spte |= _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED; - // (we're about to dirty it anyhow...) + { + l1pte_write_fault( m, &gpte, &spte ); } else { // write fault on RO page - SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte ); - spin_unlock(¤t->mm.shadow_lock); + SH_VVLOG("shadow_fault - EXIT: write fault on RO page (%lx)",gpte ); + spin_unlock(&m->shadow_lock); return 0; // propagate to guest // not clear whether we should set accessed bit here... } - } - else - { - gpte |= _PAGE_ACCESSED; - spte |= _PAGE_ACCESSED; // about to happen anyway - if ( ! (gpte & _PAGE_DIRTY) ) - spte &= ~_PAGE_RW; // force clear unless already dirty - } + } + else + { + l1pte_read_fault( m, &gpte, &spte ); + } - SH_VVLOG("plan: gpte=%08lx spte=%08lx", gpte, spte ); + SH_VVLOG("plan: gpte=%08lx spte=%08lx", gpte, spte ); - // write back updated gpte + // write back updated gpte // XXX watch out for read-only L2 entries! (not used in Linux) - if ( unlikely( __put_user( gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) ) + if ( unlikely( __put_user( gpte, (unsigned long*)&linear_pg_table[va>>PAGE_SHIFT])) ) BUG(); // fixme! if ( unlikely( __put_user( spte, (unsigned long*)&shadow_linear_pg_table[va>>PAGE_SHIFT])) ) - { + { // failed: - // the L1 may not be shadowed, or the L2 entry may be insufficient + // the L1 may not be shadowed, or the L2 entry may be insufficient unsigned long gpde, spde, gl1pfn, sl1pfn; - SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx spte=%08lx",gpte,spte ); + SH_VVLOG("3: not shadowed or l2 insufficient gpte=%08lx spte=%08lx",gpte,spte ); - gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]); + gpde = l2_pgentry_val(linear_l2_table[va>>L2_PAGETABLE_SHIFT]); - gl1pfn = gpde>>PAGE_SHIFT; + gl1pfn = gpde>>PAGE_SHIFT; - if ( ! (sl1pfn=__shadow_status(¤t->mm, gl1pfn) ) ) + if ( ! (sl1pfn=__shadow_status(¤t->mm, gl1pfn) ) ) { - // this L1 is NOT already shadowed so we need to shadow it - struct pfn_info *sl1pfn_info; - unsigned long *gpl1e, *spl1e; - int i; - sl1pfn_info = alloc_shadow_page( ¤t->mm ); - sl1pfn_info->type_and_flags = PGT_l1_page_table; + // this L1 is NOT already shadowed so we need to shadow it + struct pfn_info *sl1pfn_info; + unsigned long *gpl1e, *spl1e; + int i; + sl1pfn_info = alloc_shadow_page( ¤t->mm ); + sl1pfn_info->type_and_flags = PGT_l1_page_table; - sl1pfn = sl1pfn_info - frame_table; + sl1pfn = sl1pfn_info - frame_table; - SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn); - perfc_incrc(shadow_l1_table_count); - perfc_incr(shadow_l1_pages); + SH_VVLOG("4a: l1 not shadowed ( %08lx )",sl1pfn); + perfc_incrc(shadow_l1_table_count); + perfc_incr(shadow_l1_pages); - set_shadow_status(¤t->mm, gl1pfn, PSH_shadowed | sl1pfn); + set_shadow_status(¤t->mm, gl1pfn, PSH_shadowed | sl1pfn); - gpde = gpde | _PAGE_ACCESSED | _PAGE_DIRTY; - spde = (gpde & ~PAGE_MASK) | _PAGE_RW | (sl1pfn<>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde); - shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde); + linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde); + shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde); - gpl1e = (unsigned long *) &(linear_pg_table[ - (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]); + gpl1e = (unsigned long *) &(linear_pg_table[ + (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]); - spl1e = (unsigned long *) &shadow_linear_pg_table[ - (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]; + spl1e = (unsigned long *) &shadow_linear_pg_table[ + (va>>PAGE_SHIFT) & ~(ENTRIES_PER_L1_PAGETABLE-1) ]; - // XXX can only do this is the shadow/guest is writeable - // disable write protection if ! gpde & _PAGE_RW ???? - - for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) - { -#if SHADOW_OPTIMISE - if ( (gpl1e[i] & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == - (_PAGE_PRESENT|_PAGE_ACCESSED) ) - { - spl1e[i] = gpl1e[i]; - if ( !(gpl1e[i] & _PAGE_DIRTY) ) - spl1e[i] &= ~_PAGE_RW; - } - else -#endif - spl1e[i] = 0; + for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) + { + l1pte_no_fault( m, &gpl1e[i], &spl1e[i] ); } } - else + else { - // this L1 was shadowed (by another PT) but we didn't have an L2 - // entry for it + // this L1 was shadowed (by another PT) but we didn't have an L2 + // entry for it - SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn); + SH_VVLOG("4b: was shadowed, l2 missing ( %08lx )",sl1pfn); - spde = (gpde & ~PAGE_MASK) | (sl1pfn<>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde); - shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde); + linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(gpde); + shadow_linear_l2_table[va>>L2_PAGETABLE_SHIFT] = mk_l2_pgentry(spde); - } - shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte); - // (we need to do the above even if we've just made the shadow L1) + shadow_linear_pg_table[va>>PAGE_SHIFT] = mk_l1_pgentry(spte); + // (we need to do the above even if we've just made the shadow L1) } // end of fixup writing the shadow L1 directly failed @@ -654,7 +715,7 @@ int shadow_fault( unsigned long va, long error_code ) check_pagetable( current, current->mm.pagetable, "post-sf" ); - spin_unlock(¤t->mm.shadow_lock); + spin_unlock(&m->shadow_lock); return 1; // let's try the faulting instruction again... @@ -663,14 +724,14 @@ int shadow_fault( unsigned long va, long error_code ) void shadow_l1_normal_pt_update( unsigned long pa, unsigned long gpte, unsigned long *prev_spfn_ptr, - l1_pgentry_t **prev_spl1e_ptr ) + l1_pgentry_t **prev_spl1e_ptr ) { unsigned long gpfn, spfn, spte, prev_spfn = *prev_spfn_ptr; l1_pgentry_t * spl1e, * prev_spl1e = *prev_spl1e_ptr; -SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%p\n", -pa,gpte,prev_spfn, prev_spl1e); + SH_VVLOG("shadow_l1_normal_pt_update pa=%08lx, gpte=%08lx, prev_spfn=%08lx, prev_spl1e=%p\n", + pa,gpte,prev_spfn, prev_spl1e); // to get here, we know the l1 page *must* be shadowed @@ -679,34 +740,24 @@ pa,gpte,prev_spfn, prev_spl1e); if ( spfn == prev_spfn ) { - spl1e = prev_spl1e; + spl1e = prev_spl1e; } else { - if( prev_spl1e ) unmap_domain_mem( prev_spl1e ); - spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT ); - *prev_spfn_ptr = spfn; - *prev_spl1e_ptr = spl1e; + if( prev_spl1e ) unmap_domain_mem( prev_spl1e ); + spl1e = (l1_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT ); + *prev_spfn_ptr = spfn; + *prev_spl1e_ptr = spl1e; } - // XXX we assume only pagetables can be shadowed; this will have to change - // to allow arbitrary CoW etc. - spte = 0; + // XXX we assume only pagetables can be shadowed; + // this will have to change to allow arbitrary CoW etc. + + l1pte_no_fault( ¤t->mm, &gpte, &spte ); -#if SHADOW_OPTIMISE - if ( (gpte & (_PAGE_PRESENT|_PAGE_ACCESSED) ) == - (_PAGE_PRESENT|_PAGE_ACCESSED) ) - { - spte = gpte; - if ( !(gpte & _PAGE_DIRTY ) ) - gpte &= ~ _PAGE_RW; - } -#endif - spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t) ] = - mk_l1_pgentry( spte ); + spl1e[(pa & ~PAGE_MASK) / sizeof(l1_pgentry_t) ] = mk_l1_pgentry( spte ); - //unmap_domain_mem( (void *) spl1e ); } void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte ) @@ -725,30 +776,30 @@ void shadow_l2_normal_pt_update( unsigned long pa, unsigned long gpte ) spte = 0; - if( gpte & _PAGE_PRESENT ) + if( gpte & _PAGE_PRESENT ) s_sh = __shadow_status(¤t->mm, gpte >> PAGE_SHIFT); sp2le = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT ); // no real need for a cache here - if ( s_sh ) // PSH_shadowed - { + if ( s_sh ) // PSH_shadowed + { if ( unlikely( (frame_table[gpte>>PAGE_SHIFT].type_and_flags & PGT_type_mask) == PGT_l2_page_table) ) { - // linear page table case + // linear page table case spte = (gpte & ~_PAGE_RW) | _PAGE_DIRTY | _PAGE_ACCESSED; - } - else + } + else spte = (gpte & ~PAGE_MASK) | (s_sh<>PAGE_SHIFT; - gpfn = gpte>>PAGE_SHIFT; + spfn = spte>>PAGE_SHIFT; + gpfn = gpte>>PAGE_SHIFT; - if ( gpfn == spfn ) + if ( gpfn == spfn ) { if ( level > 1 ) FAIL("Linear map ???"); // XXX this will fail on BSD return 1; - } - else - { + } + else + { if ( level < 2 ) FAIL("Shadow in L1 entry?"); if ( __shadow_status(p, gpfn) != (PSH_shadowed | spfn) ) FAIL("spfn problem g.sf=%08lx", __shadow_status(p, gpfn) ); - } + } - return 1; + return 1; } static int check_l1_table( struct mm_struct *m, unsigned long va, - unsigned long g2, unsigned long s2 ) + unsigned long g2, unsigned long s2 ) { - int j; - unsigned long *gpl1e, *spl1e; + int j; + unsigned long *gpl1e, *spl1e; - //gpl1e = (unsigned long *) &(linear_pg_table[ va>>PAGE_SHIFT]); - //spl1e = (unsigned long *) &(shadow_linear_pg_table[ va>>PAGE_SHIFT]); + //gpl1e = (unsigned long *) &(linear_pg_table[ va>>PAGE_SHIFT]); + //spl1e = (unsigned long *) &(shadow_linear_pg_table[ va>>PAGE_SHIFT]); - gpl1e = map_domain_mem( g2<> PAGE_SHIFT; + gpfn = gptbase >> PAGE_SHIFT; - if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) ) - { + if ( ! (__shadow_status(p, gpfn) & PSH_shadowed) ) + { printk("%s-PT %08lx not shadowed\n", s, gptbase); if( __shadow_status(p, gpfn) != 0 ) BUG(); return 0; - } + } spfn = __shadow_status(p, gpfn) & PSH_pfn_mask; - if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) ) + if ( ! __shadow_status(p, gpfn) == (PSH_shadowed | spfn) ) FAILPT("ptbase shadow inconsistent1"); - gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT ); - spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT ); + gpl2e = (l2_pgentry_t *) map_domain_mem( gpfn << PAGE_SHIFT ); + spl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT ); - //ipl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT ); + //ipl2e = (l2_pgentry_t *) map_domain_mem( spfn << PAGE_SHIFT ); - if ( memcmp( &spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], - &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], - ((SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT))-DOMAIN_ENTRIES_PER_L2_PAGETABLE) - * sizeof(l2_pgentry_t)) ) - { + if ( memcmp( &spl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], + &gpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], + ((SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT))-DOMAIN_ENTRIES_PER_L2_PAGETABLE) + * sizeof(l2_pgentry_t)) ) + { printk("gpfn=%08lx spfn=%08lx\n", gpfn, spfn); for (i=DOMAIN_ENTRIES_PER_L2_PAGETABLE; i<(SH_LINEAR_PT_VIRT_START>>(L2_PAGETABLE_SHIFT)); @@ -895,54 +946,54 @@ int check_pagetable( struct mm_struct *m, pagetable_t pt, char *s ) printk("+++ (%d) %08lx %08lx\n",i, l2_pgentry_val(gpl2e[i]), l2_pgentry_val(spl2e[i]) ); FAILPT("hypervisor entries inconsistent"); - } + } - if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) != + if ( (l2_pgentry_val(spl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) != l2_pgentry_val(gpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])) ) FAILPT("hypervisor linear map inconsistent"); - if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) != + if ( (l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]) != ((spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR)) ) FAILPT("hypervisor shadow linear map inconsistent %08lx %08lx", l2_pgentry_val(spl2e[SH_LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT]), - (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR - ); + (spfn << PAGE_SHIFT) | __PAGE_HYPERVISOR + ); - if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) != + if ( (l2_pgentry_val(spl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT]) != ((__pa(frame_table[gpfn].u.domain->mm.perdomain_pt) | __PAGE_HYPERVISOR))) ) FAILPT("hypervisor per-domain map inconsistent"); - // check the whole L2 - for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) - { + // check the whole L2 + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) + { unsigned long gpte = l2_pgentry_val(gpl2e[i]); unsigned long spte = l2_pgentry_val(spl2e[i]); check_pte( p, gpte, spte, 2, i ); - } + } - // go back and recurse - for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) - { + // go back and recurse + for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) + { unsigned long gpte = l2_pgentry_val(gpl2e[i]); unsigned long spte = l2_pgentry_val(spl2e[i]); if ( spte ) check_l1_table( p, - i<>PAGE_SHIFT, spte>>PAGE_SHIFT ); + i<>PAGE_SHIFT, spte>>PAGE_SHIFT ); - } + } - unmap_domain_mem( spl2e ); - unmap_domain_mem( gpl2e ); + unmap_domain_mem( spl2e ); + unmap_domain_mem( gpl2e ); - SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n", - sh_l2_present, sh_l1_present ); + SH_VVLOG("PT verified : l2_present = %d, l1_present = %d\n", + sh_l2_present, sh_l1_present ); - return 1; + return 1; } diff --git a/xen/include/asm-i386/processor.h b/xen/include/asm-i386/processor.h index b7aa895bfd..57990d560e 100644 --- a/xen/include/asm-i386/processor.h +++ b/xen/include/asm-i386/processor.h @@ -427,6 +427,7 @@ struct mm_struct { struct shadow_status *shadow_ht_free; struct shadow_status *shadow_ht_extras; // extra allocation units unsigned int *shadow_dirty_bitmap; + unsigned int shadow_dirty_bitmap_size; // in pages, bit per page unsigned int shadow_page_count; unsigned int shadow_max_page_count; unsigned int shadow_extras_count;